##### DATA DESCRIPTION AND VALIDATION #########---------------------------------

# Script to generate comparisons between the new V-Dem measure #
# and existing data. The script also generates descriptive insights from the
# new mass mobilization data. #
# This script generates Figures 1,2,3,4,5,6,7,8 and A1,A2,A3,A4,A5,A6,A7 #
# Please note that we do not own the copyright for any external data sources #
# See the final manuscript for a proper citation of other data sets #


# Session info
# R version 4.1.0 (2021-05-18)
# Platform: x86_64-apple-darwin17.0 (64-bit)
# Running under: macOS Big Sur 11.6.8

#-----------------------------Preparation---------------------------------------



# Step 1: Load all data sets that are being compared to the V-Dem data 
# See "Data" folder

#-------------Load Brancati (2016)----------------------------------------------

# load Brancati's data on pro-democracy protests
brancati <-haven::zap_formats(haven::read_dta("Data/Brancati_DataYear.dta")) %>% 
  mutate(cowcode = countrycode(country, "country.name", "cown")) %>%  # convert to cowcode
  drop_na(cowcode) %>% # drop obs with NA for cowcode
  select(cowcode, id:size, rallies) # subset to relevant variables

#-------------Load MMAD V3 by Weidmann and Rød 2019 ----------------------------

# load MMAD V3 event data
load("Data/MMAD_eventsV3.Rda")

# subset to anti-gov events
anti <- events_all %>% 
  filter(side == 1) %>%  # anti-gov-only
  mutate(year = as.numeric(format(event_date, "%Y")), # extract year
         name = countrycode(cowcode, "cown", "country.name")) %>% # get country_name
  select(cowcode, name, year, mean_avg_numparticipants)  %>% 
  group_by(cowcode,name, year) %>% # dplyr::summarise events at the year level
  dplyr::summarise(events_anti = length(cowcode),
                   participants_anti = sum(mean_avg_numparticipants, na.rm = T)) %>% 
  complete(., year = 2003:2018, fill = list(events_anti = 0,
                                            participants_anti = 0)) %>% # Fill empty rows with 0
  ungroup 

# subset to pro-gov events
pro <- events_all %>% 
  filter(side == 0) %>% # pro-gov-only
  mutate(year = as.numeric(format(event_date, "%Y")), # extract year
         name = countrycode(cowcode, "cown", "country.name")) %>%  # get country_name
  select(cowcode, name, year, mean_avg_numparticipants)  %>% 
  group_by(cowcode,name, year) %>% 
  dplyr::summarise(events_pro = length(cowcode), # dplyr::summarise events at the year level
                   participants_pro = mean(mean_avg_numparticipants, na.rm = T)) %>% 
  complete(., year = 2003:2018, fill = list(events_pro = 0,
                                            participants_pro = 0)) %>% # Fill empty rows with 0
  ungroup 

#-------------Load NAVCO 2.1----------------------------------------------------

# Load NAVCO 2.1 data
navco2 <- rio::import("Data/NAVCO2-1_ForPublication.xls")

navco_sub <- navco2 %>% 
  filter(camp_goals != -99 & camp_goals %in% c(0,1,2)) %>% # Subset to relevant goals
  select(country_id = loc_vdem, year, camp_size_cat, prim_meth, camp_goals)  %>% 
  group_by(country_id, year) %>% # aggregate at the year level
  dplyr::summarise(size_max = max(camp_size_cat, na.rm = T),
                   prim_meth = max(prim_meth, na.rm = T),
                   camp_goals = min(camp_goals, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(size_max = ifelse(is.infinite(size_max) | size_max == -99, 0, size_max),
         campaign = 1)

#-------------Load V-Dem data (Coppedge et al. 2022)----------------------------

# Load V-Dem data
vdem <-rio::import("Data/V-Dem-CY-Full+Others-v12.csv")

# subset to relevant variables
vdem_sub <- vdem %>%
  select(year, cowcode = COWcode, country_text_id, country_name, country_id,
         v2x_regime, v2x_polyarchy, v2x_libdem,
         v2cademmob, v2cademmob_osp_codelow, v2cademmob_osp_codehigh, v2cacamps,
         v2cademmob_nr, v2cademmob_osp, v2cademmob_ord, v2cademmob_sd,v2cagenmob,
         v2caautmob, v2caautmob_nr, v2caautmob_osp, v2caautmob_ord,
         v2caautmob_sd, v2caautmob_osp_codelow, v2caautmob_osp_codehigh,
         e_pop, v2mecenefm, e_area) %>% 
  filter(!is.na(v2cademmob) & !is.na(v2caautmob))

#-------------Load Carnegie Tracker---------------------------------------------
carnegie <- read_csv("Data/Global Protest Tracker - View Data.csv") %>% # read data
  janitor::clean_names() %>% # clean names
  drop_na(country) %>% # drop obs with NA for country
  mutate(cowcode = countrycode(country, "country.name", "cown"),
         year = as.numeric(paste0("20", str_sub(start_date, -2, -1)))) %>% # add year
  group_by(cowcode, year) %>% # Aggregate at year level
  summarise(protest_name = paste0(protest_name, collapse = ", ")) %>% 
  drop_na(cowcode) %>% 
  ungroup() 

#-------------Load Beissinger data on revolutions-------------------------------
revol_goals <- readxl::read_xlsx("Data/Revolutionary episodes_v_1.0.xlsm",
                                 sheet = "2-Goals")

revols_general <- readxl::read_xlsx("Data/Revolutionary episodes_v_1.0.xlsm",
                                    sheet = "1-Timing & location") 

# merhe both data sets and subset to relevant variables
revols <- left_join(revol_goals, revols_general) %>% 
  select(revid, nameofrevolution, year = startyear,  democrat, leftist,
         rightwing, anticolonial, antimonarch, cowcode, isoabb)

#------------- Belarus example (Figure 1) --------------------------------------------

# Subset to Belarus and democracy mobilization
belarus_dem <- vdem_sub %>% filter(country_name == "Belarus" & year > 1999) %>% 
  select(year, v2cademmob_osp, v2cademmob_osp_codelow, v2cademmob_osp_codehigh) %>% 
  mutate(var = "demmob")

# Subset to Belarus and autocracy mobilization
belarus_aut <- vdem_sub %>% filter(country_name == "Belarus" & year > 1999) %>% 
  select(year, v2caautmob_osp, v2caautmob_osp_codelow, v2caautmob_osp_codehigh) %>% 
  mutate(var = "autmob")

# Combine both
belarus <- cbind(belarus_aut, belarus_dem) %>% 
  janitor::clean_names()

# Plot Belarus with credible intervals
ggplot(data = belarus, aes(x = year)) +
  geom_ribbon(aes(ymin = v2cademmob_osp_codelow,
                  ymax = v2cademmob_osp_codehigh), fill = "#08519C",  alpha = .5) +
  geom_ribbon(aes(ymin = v2caautmob_osp_codelow,
                  ymax = v2caautmob_osp_codehigh), fill = "#A50F15", alpha = .5) +
  geom_line(aes(y = v2cademmob_osp)) +
  geom_point(aes(y = v2cademmob_osp), shape = 16, size = 2.5) +
  geom_line(aes(y = v2caautmob_osp)) +
  geom_point(aes(y = v2caautmob_osp), shape = 17, size = 2.5) +
  scale_y_continuous("Mass Mobilization (original scale)", limits = c(0,4)) +
  ggtitle("") +
  theme_bw()

ggsave(filename = "output/Figure_1.pdf",
       width = 7, height = 5, dpi = "retina")

#-------------Comparison with Brancati (Figure 2)-------------------------------

# Merge V-Dem and Brancati
brancati_vdem <- left_join(brancati, vdem_sub, by = c("cowcode", "year")) %>% 
  mutate(some_events = ifelse(v2cademmob_ord > 0, 1, 0),
         overlap = ifelse(some_events == 1 & protest == 1, 1, 0),
         no_overlap = ifelse(some_events == 1 & protest == 0, 1, 0))

# Cross tab
brancati_tab <- xtabs(~ some_events + protest, data = brancati_vdem)

# Relabel 
names(dimnames(brancati_tab)) <- c("Mobilization for democracy", "Brancati (2016)")
names(rownames(brancati_tab)) <- c("V-Dem", "Brancati (2016)")

rownames(brancati_tab) <- c("No events", "Several or more events")
colnames(brancati_tab) <- c("No events", "One or more events")

# Mosaic plot
pdf(file = "output/Figure_2.pdf", height = 7, width = 9)
vcd::mosaic(brancati_tab, 
            gp =  vcd::shading_hcl,
            labeling = vcd::labeling_values)
dev.off()

#-------------Comparison with Beissinger (Figure 3)--------------------------------------------
# Merge with V-Dem
vdem_revol <- left_join(revols, vdem_sub)

# Plot mass mobilization across goals A
ggplot(vdem_revol, aes(x = factor(antimonarch), y = v2cademmob_osp)) +
  geom_jitter(width = 0.25, alpha = .2) +
  geom_boxplot(alpha = .8) +
  geom_signif(comparisons = list(c("0", "1")),
              map_signif_level = TRUE,
              na.rm = T) +
  ylab("Mobilization for democracy") +
  xlab("Goal: Anti-monarchical")

ggsave("Output/Figure_3b.pdf",
       width = 7,
       height = 5,
       dpi = "retina") 

# Plot mass mobilization across goals B
ggplot(vdem_revol, aes(x = factor(democrat), y = v2cademmob_osp)) +
  geom_jitter(width = 0.25, alpha = .2) +
  geom_boxplot(alpha = .8) +              # Add p-value to plot
  geom_signif(comparisons = list(c("0", "1")),
              map_signif_level = TRUE,
              na.rm = T) +
  ylab("Mobilization for democracy") +
  xlab("Goal: Democratization")

ggsave("Output/Figure_3a.pdf",
       width = 7,
       height = 5,
       dpi = "retina") 

#-------------Comparison with Carnegie (Figure 4)-------------------------------

# Subset V-Dem data
vdem_sub2017 <- vdem_sub %>% 
  filter(year > 2016)

# Merge Carnegie and V-Dem
vdem_carnegie <- left_join(vdem_sub2017, carnegie, by = c("cowcode", "year"))

# Rename protest issues
vdem_carnegie <- vdem_carnegie %>% 
  mutate(carnegie = ifelse(is.na(protest_name), 0, 1),
         protest_name = case_when(protest_name == "Cybersecurity / special economic zones protests" ~
                                    "Cybersecurity/ \n economic zones protests",
                                  protest_name == "Coronavirus lockdown protest, Coronavirus protest" ~
                                    "Coronavirus protest",
                                  protest_name == "Racial equality protests" ~ "George Floyd solidarity protests",
                                  protest_name == "Corruption protests, Coronavirus lockdown protest" ~ "Corruption, Coronavirus protests",
                                  T ~ protest_name
                                  
         ))

# Identify top 10 mass mob values in Carnegie
top10 <- vdem_carnegie %>% filter(carnegie == 1) %>% 
  select(country_name, country_text_id, year, protest_name, contains("v2cademmob_osp")) %>% 
  arrange(-v2cademmob_osp) %>% 
  slice(1:10) %>% 
  mutate(name_new = paste0(protest_name, " (",country_name, " ",year, ") " ),
         name_new =  iconv(name_new, "UTF-8", "ASCII", sub = ""))

# Identify bottom 10 mass mob values in Carnegie
bottom10 <- vdem_carnegie %>% filter(carnegie == 1) %>% 
  select(country_name, country_text_id, year, protest_name, contains("v2cademmob_osp")) %>% 
  arrange(v2cademmob_osp) %>% 
  slice(1:10) %>% 
  mutate(name_new = paste0(protest_name, " (",country_name, " ",year, ") " ),
         name_new =  iconv(name_new, "UTF-8", "ASCII", sub = ""))

# Plot top 10 protests
ggplot(top10, aes(x = reorder(name_new, v2cademmob_osp),  y = v2cademmob_osp)) +
  geom_point() +
  geom_errorbar(aes(ymin= v2cademmob_osp_codelow,
                    ymax= v2cademmob_osp_codehigh), colour="black", width=.1) +
  scale_x_discrete("", labels = function(x) str_wrap(x, width = 30)) +
  scale_y_continuous("Mobilization for democracy", limits = c(3.25,4),
                     breaks = seq(3,4,.25), labels = seq(3,4,.25)) +
  coord_flip()+
  theme_bw()

ggsave("output/Figure_4b.pdf",
       width = 7,
       height = 5,
       dpi = "retina") 

# Plot bottom 10 protests
ggplot(bottom10, aes(x = reorder(name_new, v2cademmob_osp),  y = v2cademmob_osp)) +
  geom_point() +
  geom_errorbar(aes(ymin= v2cademmob_osp_codelow,
                    ymax= v2cademmob_osp_codehigh), colour="black", width=.1) +
  scale_x_discrete("", labels = function(x) str_wrap(x, width = 30)) +
  scale_y_continuous("Mobilization for democracy", limits = c(0,.75),
                     breaks = seq(0,1,.25), labels = seq(0,1,.25)) +
  coord_flip() +
  theme_bw()

ggsave("output/Figure_4a.pdf",
       width = 7,
       height = 5,
       dpi = "retina") 

#-------------Mass mobilization over time (Figures 5 and 6)------------------
min_year = 1900
max_year = 2021

V_dem_plots <- vdem %>%
  select(year, country_name, country_id, country_text_id, cowcode = COWcode, e_regiongeo, e_regionpol, e_regionpol_6C,
         v2x_regime, v2x_polyarchy, v2cademmob_osp, v2caautmob_osp) %>% 
  dplyr::filter(year >= min_year & year <= max_year) %>%
  tidyr::pivot_longer(starts_with("v2ca"), names_to = "indicator", values_to = "value") %>% 
  tidyr::drop_na(value) %>% 
  dplyr::mutate(region = recode(e_regionpol_6C, 
                                `1`="Eastern Europe and Central Asia",
                                `2`="Latin America and the Caribbean",
                                `3`="MENA",
                                `4`="Sub-Saharan Africa",
                                `5`="Western Europe and North America",
                                `6`="Asia and Pacific"),
                regime_type = recode(v2x_regime, 
                                     `0`="Closed autocracy",
                                     `1`="Electoral autocracy",
                                     `2`="Electoral democracy",
                                     `3`="Liberal democracy"))


# Plot average mobilization scores over time
ggplot2::ggplot(V_dem_plots,
                aes(x = year, y = value, group = indicator,
                    color = indicator, linetype = indicator)) +
  stat_smooth(method = "loess", span = 0.05, se=TRUE, alpha = 0.3) +
  scale_x_continuous("",breaks = seq(round(min_year / 10) * 10, round(max_year / 10) * 10, 10)) +
  scale_y_continuous("Mass mobilization", breaks = seq(0,2,0.5), limits = c(0,NA)) +
  scale_color_manual("", breaks = c("v2caautmob_osp", "v2cademmob_osp"),
                     values = c("#A50F15", "#08519C"),
                     labels = c("Mobilization for autocracy", "Mobilization for democracy")) +
  scale_linetype_manual("", breaks = c("v2caautmob_osp", "v2cademmob_osp"),
                        values = c("dashed", "solid"),
                        labels = c("Mobilization for autocracy", "Mobilization for democracy")) +
  theme_bw() + theme(legend.position = "bottom",
                     legend.key.width = unit(2,"line")) + 
  annotate(geom = "curve", x = 1985, y = 1.8, xend = 1990, yend = 1.63, 
           curvature = .1, arrow = arrow(length = unit(1, "mm"))) +
  annotate(geom = "curve", x = 2009, y = 2.1, xend = 2011, yend = 1.76, 
           curvature = -.1, arrow = arrow(length = unit(1, "mm"))) +
  annotate(geom = "curve", x = 2015, y = 1.45, xend = 2020, yend = 1.62, 
           curvature = -.001, arrow = arrow(length = unit(1, "mm"))) +
  annotate(geom = "text", x = 1975, y = 1.85, label = "Collapse of the SU", hjust = "left", size = 2.5) +
  annotate(geom = "text", x = 2003, y = 2.13, label = "Arab uprisings", hjust = "left", size = 2.5) +
  annotate(geom = "text", x = 2005, y = 1.43, label = "Covid-19 pandemic", hjust = "left", size = 2.5) 

ggsave("output/Figure_5.pdf", plot = last_plot(), 
       width = 7,
       height = 5,
       limitsize = TRUE)

# Plot average mobilization scores over time by region
ggplot2::ggplot(V_dem_plots,
                aes(x = year, y = value, group = indicator, color = indicator, linetype = indicator)) +
  stat_smooth(method = "loess", span = 0.06, se=TRUE, alpha = 0.3) +
  xlab("") +  ylab("Mass mobilization") +
  scale_x_continuous(breaks = seq(round(min_year / 10) * 10, round(max_year / 10) * 10, 10)) +
  scale_color_manual("", breaks = c("v2caautmob_osp", "v2cademmob_osp"),
                     values = c("#A50F15", "#08519C"),
                     labels = c("Mobilization for autocracy", "Mobilization for democracy")) +
  scale_linetype_manual("", breaks = c("v2caautmob_osp", "v2cademmob_osp"),
                        values = c("dashed", "solid"),
                        labels = c("Mobilization for autocracy", "Mobilization for democracy")) +
  theme_bw() + theme(legend.position = "bottom",
                     legend.key.width = unit(2,"line"),
                     axis.text.x = element_text(angle = 45,hjust=1)) +
  facet_wrap(~ region)

ggsave("output/Figure_6.pdf", plot = last_plot(), 
       width = 8,
       height = 6,
       limitsize = TRUE)


#-------------Mobilization and democracy (Figures 7)----------------------
min_year = 1900
max_year = 2021

# Subset V-Dem data
V_dem_plots2 <- vdem %>%
  select(year, country_name, country_id, country_text_id, cowcode = COWcode, e_regiongeo, e_regionpol, e_regionpol_6C,
         v2x_regime, e_boix_regime, v2x_polyarchy, v2x_libdem, v2cademmob, v2cademmob_osp, v2caautmob, v2caautmob_osp, v2cagenmob_osp) %>% 
  dplyr::filter(year >= min_year & year <= max_year) %>% 
  arrange(country_name, year) %>% 
  group_by(country_name) %>% 
  mutate(polyarchy_diff = v2x_polyarchy - lag(v2x_polyarchy),
         v2x_polyarchy_lag = lag(v2x_polyarchy),
         libdem_diff = v2x_libdem - lag(v2x_libdem),
         pro_dem_diff = v2cademmob_osp - lag(v2cademmob_osp),
         pro_aut_diff = v2caautmob_osp - lag(v2caautmob_osp),
         pro_dem_lag = lag(v2cademmob_osp),
         mass_mob_lag = lag(v2cagenmob_osp),
         pro_aut_lag = lag(v2caautmob_osp),
         regime_lag = lag(v2x_regime),
         bmr_lag = lag(e_boix_regime)) %>% 
  ungroup()

# Plot bivariate relationship between pro-democracy mobilization and electoral democracy
ggplot(V_dem_plots2, aes(x = v2x_polyarchy, y = v2cademmob_osp)) + 
  geom_point(alpha = 0.25, shape=1, color = "grey70") +
  stat_smooth(method = "loess", span = 0.5, se = TRUE, alpha = 0.85, color = "#08519C") +
  scale_x_continuous(limits = c(0,1), breaks = seq(0, 1.0, 0.2)) +
  xlab("Electoral Democracy Index") + ylab("Mass mobilization for democracy") + 
  theme_bw()

ggsave("output/Figure_7a.pdf", plot = last_plot(), 
       width = 6,
       height = 5,
       limitsize = TRUE)

# Plot bivariate relationship between pro-autocracy mobilization and electoral democracy
ggplot(V_dem_plots2, aes(x = v2x_polyarchy, y = v2caautmob_osp)) + 
  geom_point(alpha = 0.25, shape=1, color = "grey70") +
  stat_smooth(method = "loess", span = 0.5, se = TRUE, alpha = 0.75, color = "#A50F15") +
  scale_x_continuous(limits = c(0,1), breaks = seq(0, 1.0, 0.2)) +
  xlab("Electoral Democracy Index") + ylab("Mass mobilization for autocracy") + 
  theme_bw()

ggsave("output/Figure_7b.pdf", plot = last_plot(), 
       width = 6,
       height = 5,
       limitsize = TRUE)

#----------- Plot changes in democracy against mobilization (Figures 8)---------

# Plot changes in democracy against mobilization
p1 <- ggplot(V_dem_plots2, aes(x = pro_dem_lag, y = polyarchy_diff,
                               group = factor(regime_lag),
                               color = factor(regime_lag),
                               linetype = factor(regime_lag))) + 
  stat_smooth(aes(fill = factor(regime_lag)), method = "lm", 
              span = 0.50, se = T, alpha = .25) +
  scale_x_continuous("Mobilization for democracy (t-1)",
                     limits = c(0,4), breaks = seq(0, 4, 1)) +
  scale_y_continuous("Change in the EDI",  breaks = seq(-0.03,.04, .01)) +
  scale_color_manual("Regime type (t-1)", breaks = c(0,1,2,3), labels = c("Closed autocracy",
                                                                          "Electoral autocracy",
                                                                          "Electoral democracy",
                                                                          "Liberal democracy"),
                     values = c("#e66101", "#fdb863", "#b2abd2", "#5e3c99")) +
  scale_fill_manual("Regime type (t-1)", breaks = c(0,1,2,3), labels = c("Closed autocracy",
                                                                         "Electoral autocracy",
                                                                         "Electoral democracy",
                                                                         "Liberal democracy"),
                    values = c("#e66101", "#fdb863", "#b2abd2", "#5e3c99")) +
  scale_linetype_manual("Regime type (t-1)", breaks = c(0,1,2,3), labels = c("Closed autocracy",
                                                                             "Electoral autocracy",
                                                                             "Electoral democracy",
                                                                             "Liberal democracy"),
                        values = c(1,2,3,4)) +
  coord_cartesian(
    ylim = c(-0.03,0.02)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "black") +
  theme_bw() 


p2 <- ggplot(V_dem_plots2, aes(x = pro_aut_lag, y = polyarchy_diff, group = factor(regime_lag), color = factor(regime_lag), linetype = factor(regime_lag))) + 
  #  geom_point(color = "darkgrey", alpha = .55) +
  stat_smooth(aes(fill = factor(regime_lag)), method = "lm",  span = 0.60, se = T, alpha = .15) +
  scale_x_continuous("Mobilization for autocracy (t-1)", limits = c(0,4), breaks = seq(0, 4, 1)) +
  scale_y_continuous("Change in the EDI",  breaks = seq(-0.03,.04, .01)) +
  scale_color_manual("Regime type (t-1)", breaks = c(0,1,2,3), labels = c("Closed autocracy",
                                                                          "Electoral autocracy",
                                                                          "Electoral democracy",
                                                                          "Liberal democracy"),
                     values = c("#e66101", "#fdb863", "#b2abd2", "#5e3c99")) +
  scale_linetype_manual("Regime type (t-1)", breaks = c(0,1,2,3), labels = c("Closed autocracy",
                                                                             "Electoral autocracy",
                                                                             "Electoral democracy",
                                                                             "Liberal democracy"),
                        values = c(1,2,3,4)) +
  scale_fill_manual("Regime type (t-1)", breaks = c(0,1,2,3), labels = c("Closed autocracy",
                                                                         "Electoral autocracy",
                                                                         "Electoral democracy",
                                                                         "Liberal democracy"),
                    values = c("#e66101", "#fdb863", "#b2abd2", "#5e3c99")) +
  coord_cartesian(
    ylim = c(-0.03,0.02)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "black") +
  theme_bw() 

ggpubr::ggarrange(p1, p2, ncol=2, nrow=1, common.legend = TRUE, legend="bottom")

ggsave("output/Figure_8.pdf", plot = last_plot(), 
       width = 7,
       height = 5,
       limitsize = TRUE)

#-------------Comparison with MMAD (Figure A1,A2,A3)---------------------------

# Merge MMAD and V-Dem

# anti-gov protest first
vdem_anti <- left_join(vdem_sub, anti, by = c("cowcode", "year"))

# pro-gov protest second
vdem_mmad <- left_join(vdem_anti, pro, by = c("cowcode", "year"))

# adjust MMAD sample, kick out periods that are not coded according to codebook
# Note that this is a broad brushed approach to retain with full year observations 
# in both data sets.
vdem_mmad <- vdem_mmad %>% 
  select(-contains("name.")) %>% 
  drop_na(events_anti) %>% # remove obs with no match in MMAD
  filter(!(country_name == "Afghanistan" & (year < 2010 | year %in% c(2014,2015))) &
           !(country_name == "Bahrain" & (year < 2016)) & 
           !(country_name == "Bangladesh" & (year < 2013 )) & 
           !(country_name == "Botswana" & (year > 2015)) &
           !(country_name == "Burkina Faso" & (year > 2015)) &
           !(country_name == "Burundi" & (year < 2013 )) & 
           !(country_name == "Equatorial Guinea" & (year < 2016 )) & 
           !(country_name == "Egypt" & (year == 2012)) & 
           !(country_name == "The Gambia" & (year > 2017)) & 
           !(country_name == "Georgia" & (year > 2003)) & 
           !(country_name == "Guinea" & (year > 2010 & year < 2016)) & 
           !(country_name == "Guinea-Bissau") & 
           !(country_name == "Haiti" & (year < 2016)) & 
           !(country_name == "Honduras" & (year < 2016)) & 
           !(country_name == "Iraq" & (year < 2017)) & 
           !(country_name == "Ivory Coast" & (year > 2015)) &
           !(country_name == "Kyrgyzstan" & ((year > 2012 & year < 2016) | year == 2018)) & 
           !(country_name == "Kenya" & (year < 2017)) & 
           !(country_name == "Lebanon") &
           !(country_name == "Lesotho") &
           !(country_name == "Liberia") & 
           !(country_name == "Libya" & (year > 2012)) & 
           !(country_name == "Madagascar" & (year < 2009 | year %in% c(2014,2015))) & 
           !(country_name == "Nicaragua" & (year < 2016)) &
           !(country_name == "North Macedonia") &
           !(country_name == "Namibia" & (year > 2015)) &
           !(country_name == "Nepal" & (year > 2006)) & 
           !(country_name == "Niger") &
           !(country_name == "Pakistan" & (year > 2008 & year < 2016)) & 
           !(country_name == "Papua New Guinea" & (year < 2016)) &
           !(country_name == "Qatar" & (year < 2016)) &
           !(country_name == "Serbia" & (year < 2016)) &
           !(country_name == "Somalia" & (year < 2016)) &
           !(country_name == "South Sudan" & (year < 2012)) & 
           !(country_name == "Sri Lanka") &
           !(country_name == "Syria" & (year > 2012 & year < 2016)) & 
           !(country_name == "Tanzania" & (year > 2017)) &
           !(country_name == "Thailand" & (year < 2014)) & 
           !(country_name == "Togo" & (year == 2016)) & 
           !(country_name == "Tunisia" & (year > 2012)) &
           !(country_name == "Turkey" & (year < 2016)) &
           !(country_name == "Ukraine" & (year < 2016)) &
           !(country_name == "Venezuela" & (year < 2006)) & 
           !(country_name == "Yemen" & (year > 2012))) %>% 
  mutate(events_anti_mio = (events_anti * participants_anti)/e_pop,
         events_anti_quart = as.numeric(Hmisc::cut2(events_anti_mio, g = 6)) - 1,
         participants_anti_quart = as.numeric(Hmisc::cut2(participants_anti, g = 6)) - 1,
         events_pro_mio = (events_pro * participants_pro)/e_pop,
         events_pro_quart = as.numeric(Hmisc::cut2(events_pro_mio, g = 6)) - 1,
         participants_pro_quart = as.numeric(Hmisc::cut2(participants_pro, g = 6)) - 1) 

# Figure A1a: Relationship between MMAD and pro-democracy participants
pA1a <- ggplot(vdem_mmad, aes(x = v2cademmob_osp, y =  I(log(participants_anti + 1)))) +
  geom_point(alpha = .65, color = "grey60") +
  geom_smooth(method = "lm", color = "#08519C") +
  ylab("Anti-government participants (log)") +
  xlab("Mobilization for Democracy") +
  theme_bw() +
  ggpubr::stat_cor(method = "pearson",
                   label.x = min(vdem_mmad$v2cademmob_osp),
                   label.y = max(I(log(vdem_mmad$participants_anti + 1)), na.rm = T)) +
  theme(panel.grid.minor = element_blank())

print(pA1a)

#ggsave("output/Figure_A1a.pdf",
#       width = 7,
#       height = 5,
#       dpi = "retina")

# Figure A1b: Relationship between MMAD and pro-autocracy participants
pA1b <- ggplot(vdem_mmad, aes(x = v2caautmob_osp, y =  I(log(participants_pro + 1)))) +
  geom_point(alpha = .65, color = "grey60") +
  geom_smooth(method = "lm", color = "#A50F15") +
  ylab("Pro-government participants (log)") +
  xlab("Mobilization for Autocracy") +
  theme_bw() +
  ggpubr::stat_cor(method = "pearson",
                   label.x = min(vdem_mmad$v2caautmob_osp),
                   label.y = max(I(log(vdem_mmad$participants_pro + 1)), na.rm = T)) +
  theme(panel.grid.minor = element_blank())

print(pA1b)

#ggsave("output/Figure_A1b.pdf",
#       width = 7,
#       height = 5,
#       dpi = "retina")

ggpubr::ggarrange(pA1a,pA1b, ncol = 1, nrow = 2) 

ggsave("output/Figure_A1.pdf",
       width = 6,
       height = 6,
       dpi = "retina")

# country trajectories for V-Dem and MMAD

#select country TUNISIA
vdem_country <- vdem_mmad  %>%
  filter(country_name == "Tunisia") %>% 
  select(country_name, year, events_anti, events_pro, v2cademmob_osp, v2caautmob_osp) %>% 
  pivot_longer(cols = c(events_anti, events_pro, v2cademmob_osp, v2caautmob_osp),
               names_to = "measurement") %>% 
  mutate(value_log = log(value + 1))


# Figure A2: Plot both time series
pA2a <- ggplot(vdem_country %>% filter(measurement %in% c("events_anti", "events_pro")),
               aes(year, value, group = measurement, color = measurement, linetype = measurement)) +
  geom_line(size = .7) +
  geom_point(size = 1) +
  scale_y_continuous(
    limits = c(0, I(max(vdem_country$value) + 25)), expand = c(0, 0),
    breaks = seq(0,I(max(vdem_country$value) + 25),25),
    name = "Protest events (MMAD)"
  ) + 
  scale_x_continuous(name = "", expand = c(0, 0),
                     limits = c(min(vdem_country$year), 2020),
                     breaks = seq(min(vdem_country$year),2020,5)) +
  scale_color_manual("", breaks = c("events_anti", "events_pro"),
                     values = c("#08519C","#A50F15"), labels = c("anti-government", "pro-government")) +
  scale_linetype_manual("", breaks = c("events_anti", "events_pro"),
                        values = c("solid", "dashed"),
                        labels = c("anti-government", "pro-government")) +
  coord_cartesian(clip = "off") +
  theme(
    axis.line = element_blank(),
    plot.margin = margin(12, 1.5, 0, 1.5)
  ) +
  theme_bw()

print(pA2a)

pA2b <- ggplot(vdem_country %>% filter(measurement %in% c("v2cademmob_osp", "v2caautmob_osp")),
               aes(year, value, group = measurement, color = measurement, linetype = measurement)) +
  geom_line(size = .7) +
  geom_point(size = 1) +
  scale_y_continuous(
    limits = c(round(min(vdem_country$value), 0), 4), expand = c(0, 0),
    breaks = seq(round(min(vdem_country$value), 0),4,.5),
    name = "Mass Mobilization (V-Dem)"
  ) + 
  scale_x_continuous(name = "", expand = c(0, 0),
                     limits = c(min(vdem_country$year), 2020),
                     breaks = seq(min(vdem_country$year),2020,5)) +
  scale_color_manual("", breaks = c("v2cademmob_osp", "v2caautmob_osp"),
                     values = c("#08519C","#A50F15"), 
                     labels = c("pro-democracy", "pro-autocracy")) +
  scale_linetype_manual("", breaks = c("v2cademmob_osp", "v2caautmob_osp"),
                        values = c("solid", "dashed"),
                        labels = c("pro-democracy", "pro-autocracy")) +
  coord_cartesian(clip = "off") +
  theme(
    axis.line = element_blank(),
    plot.margin = margin(12, 1.5, 0, 1.5)
  ) + ggtitle("Tunisia") +
  theme_bw()

print(pA2b)

cowplot::plot_grid(pA2b, pA2a, align = 'v', ncol = 1) 

ggsave("output/Figure_A2.pdf",
       width = 7,
       height = 5,
       dpi = "retina")

#select country VENEZUELA
vdem_country <- vdem_mmad  %>%
  filter(country_name == "Venezuela") %>% 
  select(country_name, year, events_anti, events_pro, v2cademmob_osp, v2caautmob_osp) %>% 
  pivot_longer(cols = c(events_anti, events_pro, v2cademmob_osp, v2caautmob_osp),
               names_to = "measurement") %>% 
  mutate(value_log = log(value + 1))


# Figure A3: Plot both time series
pA3a <- ggplot(vdem_country %>% filter(measurement %in% c("events_anti", "events_pro")),
               aes(year, value, group = measurement, color = measurement, linetype = measurement)) +
  geom_line(size = .7) +
  geom_point(size = 1) +
  scale_y_continuous(
    limits = c(0, I(max(vdem_country$value) + 25)), expand = c(0, 0),
    breaks = seq(0,I(max(vdem_country$value) + 25),25),
    name = "Protest events (MMAD)"
  ) + 
  scale_x_continuous(name = "", expand = c(0, 0),
                     limits = c(min(vdem_country$year), 2020),
                     breaks = seq(min(vdem_country$year),2020,5)) +
  scale_color_manual("", breaks = c("events_anti", "events_pro"),
                     values = c("#08519C","#A50F15"), labels = c("anti-government", "pro-government")) +
  scale_linetype_manual("", breaks = c("events_anti", "events_pro"),
                        values = c("solid", "dashed"),
                        labels = c("anti-government", "pro-government")) +
  coord_cartesian(clip = "off") +
  theme(
    axis.line = element_blank(),
    plot.margin = margin(12, 1.5, 0, 1.5)
  ) +
  theme_bw()

print(pA3a)

pA3b <- ggplot(vdem_country %>% filter(measurement %in% c("v2cademmob_osp", "v2caautmob_osp")),
               aes(year, value, group = measurement, color = measurement, linetype = measurement)) +
  geom_line(size = .7) +
  geom_point(size = 1) +
  scale_y_continuous(
    limits = c(round(min(vdem_country$value), 0), 4), expand = c(0, 0),
    breaks = seq(round(min(vdem_country$value), 0),4,.5),
    name = "Mass Mobilization (V-Dem)"
  ) + 
  scale_x_continuous(name = "", expand = c(0, 0),
                     limits = c(min(vdem_country$year), 2020),
                     breaks = seq(min(vdem_country$year),2020,5)) +
  scale_color_manual("", breaks = c("v2cademmob_osp", "v2caautmob_osp"),
                     values = c("#08519C","#A50F15"), 
                     labels = c("pro-democracy", "pro-autocracy")) +
  scale_linetype_manual("", breaks = c("v2cademmob_osp", "v2caautmob_osp"),
                        values = c("solid", "dashed"),
                        labels = c("pro-democracy", "pro-autocracy")) +
  coord_cartesian(clip = "off") +
  theme(
    axis.line = element_blank(),
    plot.margin = margin(12, 1.5, 0, 1.5)
  ) + ggtitle("Venezuela") +
  theme_bw()

print(pA3b)

cowplot::plot_grid(pA3b, pA3a, align = 'v', ncol = 1) 

ggsave("output/Figure_A3.pdf",
       width = 7,
       height = 5,
       dpi = "retina")

#-------------Comparison with NAVCO 2.1 (Figure A4)-----------------------------
# merge V-Dem and Navco
vdem_navco <- left_join(vdem_sub, navco_sub, by = c("country_id", "year"))

#subset to coverage of NAVCO in order to have a comparable sample
vdem_navco_sub <- vdem_navco %>% 
  filter(year %in% seq(1945,2013,1)) %>% 
  mutate(some_events = ifelse(v2cademmob_ord > 0, 1, 0),
         campaign = ifelse(is.na(campaign), 0, campaign),
         overlap = ifelse(some_events == 1 & campaign == 1, 1, 0))

# Cross tab
navco_tab <- xtabs(~ some_events + campaign, data = vdem_navco_sub)

# Relabel dimensions
names(dimnames(navco_tab)) <- c("Mobilization for Democracy", "NAVCO 2.1")
names(rownames(navco_tab)) <- c("V-Dem", "NAVCO 2.1")

rownames(navco_tab) <- c("No events", "Several or more events")
colnames(navco_tab) <- c("No campaign", "Campaign year")

# Mosaic plot
pdf(file = "output/Figure_A4.pdf", height = 7, width = 9)
mosaic(navco_tab,     
       gp =  shading_hcl,
       labeling = labeling_values)
dev.off()

#-------------Coder level / uncertainty analysis (Figure A5)--------------------------------------------

data <- vdem_sub %>% 
  mutate(time_since_1900 = (year - 1900) / 10)

# check predictors of disagreement
demmob_sd <- lme4::lmer(v2cademmob_sd ~ I(v2x_polyarchy*10) + I(v2mecenefm*-1) + v2cacamps +
                          log(e_pop) +  v2cademmob_nr + time_since_1900 +
                          (1|country_name),
                        data = data)

# check predictors of disagreement
autmob_sd <- lme4::lmer(v2caautmob_sd ~ I(v2x_polyarchy*10) + I(v2mecenefm*-1) + v2cacamps +
                          log(e_pop) + v2caautmob_nr + time_since_1900 +
                          (1|country_name),
                        data = data)

cm <- c('time_since_1900' = 'Time since 1900',
        'v2cacamps' = 'Polarization',
        'v2cademmob_nr' = 'Number of coders', 
        'v2caautmob_nr' = 'Number of coders', 
        'log(e_pop)' = 'Population size (log)',
        'I(v2mecenefm * -1)' = 'Media censorship',
        'I(v2x_polyarchy * 10)' = 'Electoral Democracy Index')

models <- list(
  "Mobilization for autocracy" = autmob_sd,
  "Mobilization for democracy" = demmob_sd
)

dat <- modelplot(models, coef_omit = 'Interc', coef_map = cm,
                 draw = F)

ggplot(dat, aes( y = term, x = estimate,
                 xmin = conf.low, xmax = conf.high,
                 color = model, shape = model)) +
  geom_point(size = 2) +
  geom_linerange(size = 1) +
  scale_color_manual("", breaks = c("Mobilization for democracy",
                                    "Mobilization for autocracy" ),
                     values = c("#08519C", "#A50F15")) +
  scale_shape_manual("", breaks = c("Mobilization for democracy",
                                    "Mobilization for autocracy" ),
                     values = c(17,19)) +
  geom_vline(xintercept = 0, lty = "dashed") +
  theme_bw() +
  ylab("") +
  xlab("Estimate") +
  theme(panel.grid.major.y = element_blank())

ggsave(filename = "output/figure_A5.pdf",
       width = 7, height = 5, dpi = "retina")

#-------------Maps of Mobilization in 2021 (Figures A6 and A7)------------------

vdem_sub_2021 <- vdem_sub %>% 
  filter(year == 2021) 

map <- joinCountryData2Map(vdem_sub_2021,
                           joinCode = "ISO3",
                           nameJoinColumn = "country_text_id",
                           verbose = T)

#Exclude Antarctica
new_world <- subset(map, continent != "Antarctica")

#Define color palette
colors <- RColorBrewer::brewer.pal(n = 8, name = "Blues")

png(file="output/Figure_A6.png", width = 465, height = 295, units='mm', res = 300)
par(mar=c(1,1,1,0)) #(bottom, left, top, right)
mapParams <- mapCountryData(new_world, nameColumnToPlot = "v2cademmob_ord",
                            mapTitle = "",
                            colourPalette = colors,
                            catMethod = "pretty",
                            addLegend = F,
                            missingCountryCol = "#dddddd")

do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 4, legendLabels = "all"))
dev.off()

#Define color palette
colors <- RColorBrewer::brewer.pal(n = 8, name = "Reds")

png(file="output/Figure_A7.png", width = 465, height = 295, units='mm', res = 300)
par(mar=c(1,1,1,0)) #(bottom, left, top, right)
mapParams <- mapCountryData(new_world, nameColumnToPlot = "v2caautmob_ord",
                            mapTitle = "",
                            colourPalette = colors,
                            catMethod = "pretty",
                            addLegend = F,
                            missingCountryCol = "#dddddd")

do.call(addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 4, legendLabels = "all"))
dev.off()



#-------------Plot all country trajectories (Appendix C)-------------------------------------------

data_long <- vdem_sub %>% 
  dplyr::select(country_name, year, v2cademmob_osp, v2caautmob_osp) %>% 
  group_by(country_name) %>%
  arrange(country_name, year) %>% 
  complete(., year = 1900:2020) %>% 
  ungroup() %>% 
  pivot_longer(cols = c("v2cademmob_osp", "v2caautmob_osp") ) %>% 
  mutate(country_name = str_replace_all(country_name, "/", "-")) #needed for loop

# Plot timelines for all countries
mylist <- list()

for (i in unique(data_long$country_name)) {
  
  g <- ggplot(data = data_long %>% filter(country_name == i),
              aes(x = year, y = value, group = name, color = name,
                  linetype = name)) +
    geom_line(size = .8) +
    geom_point(size = .5) +
    scale_y_continuous("Mobilization", limits = c(0,4), breaks = c(0,1,2,3,4), labels = c(0,1,2,3,4)) +
    scale_x_continuous("", limits = c(1900, 2021), breaks = seq(1900,2020,10), labels = seq(1900,2020,10)) +
    scale_color_manual("",breaks = c("v2cademmob_osp", "v2caautmob_osp"),
                       values = c("#08519C", "#A50F15"),
                       labels = c("Pro-democracy", "Pro-autocracy")) +
    scale_linetype_manual("",breaks = c("v2cademmob_osp", "v2caautmob_osp"),
                          values = c("solid", "dashed"),
                          labels = c("Pro-democracy", "Pro-autocracy")) +
    theme(legend.position = "bottom",
          axis.text.x = element_text(angle = 45, hjust = 1)) +
    ggtitle(sprintf(i))
  
  ggsave(paste0("Output/Appendix C/",i,".pdf"),
         width = 7,
         height = 5,
         dpi = "retina") 
  
  mylist[[i]] <- ggplotGrob(g)
  
}

# save all plots in one file
ggsave(
  filename = "Output/country_plots.pdf", 
  plot = marrangeGrob(mylist, nrow = 3, ncol = 2), 
  width = 210, height = 297, units = "mm"
)
